{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Test with permutations the significance of a classification score\n\n\nIn order to test if a classification score is significative a technique\nin repeating the classification procedure after randomizing, permuting,\nthe labels. The p-value is then given by the percentage of runs for\nwhich the score obtained is greater than the classification score\nobtained in the first place.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Author:  Alexandre Gramfort <alexandre.gramfort@inria.fr>\n# License: BSD 3 clause\n\nprint(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.svm import SVC\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn.model_selection import permutation_test_score\nfrom sklearn import datasets\n\n\n# #############################################################################\n# Loading a dataset\niris = datasets.load_iris()\nX = iris.data\ny = iris.target\nn_classes = np.unique(y).size\n\n# Some noisy data not correlated\nrandom = np.random.RandomState(seed=0)\nE = random.normal(size=(len(X), 2200))\n\n# Add noisy data to the informative features for make the task harder\nX = np.c_[X, E]\n\nsvm = SVC(kernel='linear')\ncv = StratifiedKFold(2)\n\nscore, permutation_scores, pvalue = permutation_test_score(\n    svm, X, y, scoring=\"accuracy\", cv=cv, n_permutations=100, n_jobs=1)\n\nprint(\"Classification score %s (pvalue : %s)\" % (score, pvalue))\n\n# #############################################################################\n# View histogram of permutation scores\nplt.hist(permutation_scores, 20, label='Permutation scores',\n         edgecolor='black')\nylim = plt.ylim()\n# BUG: vlines(..., linestyle='--') fails on older versions of matplotlib\n# plt.vlines(score, ylim[0], ylim[1], linestyle='--',\n#          color='g', linewidth=3, label='Classification Score'\n#          ' (pvalue %s)' % pvalue)\n# plt.vlines(1.0 / n_classes, ylim[0], ylim[1], linestyle='--',\n#          color='k', linewidth=3, label='Luck')\nplt.plot(2 * [score], ylim, '--g', linewidth=3,\n         label='Classification Score'\n         ' (pvalue %s)' % pvalue)\nplt.plot(2 * [1. / n_classes], ylim, '--k', linewidth=3, label='Luck')\n\nplt.ylim(ylim)\nplt.legend()\nplt.xlabel('Score')\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}